{smcl}
{com}{sf}{ul off}{txt}{.-}
      name:  {res}<unnamed>
       {txt}log:  {res}C:\Users\hilar\Box\lights_2022\replication package\code\merge_pfaf4.smcl
  {txt}log type:  {res}smcl
 {txt}opened on:  {res}20 Dec 2025, 17:25:22
{txt}
{com}. timer on 1
{txt}
{com}. 
. 
. ** start with .05 degree data
. ** get pfa4 level data for these 
. use "../data/grid_lights_drought.dta", clear
{txt}
{com}. sort x y year
{txt}
{com}. 
. * merge cross-sectional data that's at .05 degree cell level
. * merge cross-sectional data that's at .05 degree cell level
. merge m:1 x y using "../data/flares.dta"
{res}
{txt}{col 5}Result{col 33}Number of obs
{col 5}{hline 41}
{col 5}Not matched{col 30}{res}      58,436,271
{txt}{col 9}from master{col 30}{res}      58,432,836{txt}  (_merge==1)
{col 9}from using{col 30}{res}           3,435{txt}  (_merge==2)

{col 5}Matched{col 30}{res}         140,916{txt}  (_merge==3)
{col 5}{hline 41}

{com}. drop if _merge==2
{txt}(3,435 observations deleted)

{com}. gen flare=(detection_frequency_2012>0) & _merge==3
{txt}
{com}. drop _merge
{txt}
{com}. 
. summarize lights if flare==1, detail

                           {txt}lights
{hline 61}
      Percentiles      Smallest
 1%    {res}        0              0
{txt} 5%    {res}        0              0
{txt}10%    {res}        0              0       {txt}Obs         {res}     83,268
{txt}25%    {res} .1111111              0       {txt}Sum of wgt. {res}     83,268

{txt}50%    {res}        6                      {txt}Mean          {res} 12.67336
                        {txt}Largest       Std. dev.     {res} 15.98655
{txt}75%    {res} 18.97222             63
{txt}90%    {res} 39.11111             63       {txt}Variance      {res} 255.5698
{txt}95%    {res} 49.83333             63       {txt}Skewness      {res} 1.427633
{txt}99%    {res} 60.52778             63       {txt}Kurtosis      {res} 4.099532
{txt}
{com}. replace lights=. if flare
{txt}(83,268 real changes made, 83,268 to missing)

{com}. replace ihslights=. if flare
{txt}(83,268 real changes made, 83,268 to missing)

{com}. 
. 
. *drop flare cells in the same manner as in cell-level data
. gen x_10=floor((x-1)/10) + 1
{txt}
{com}. gen y_10=floor((y-1)/10) + 1 
{txt}
{com}. bys x_10 y_10: egen flaremin=min(flare)
{txt}
{com}. replace lights=. if flaremin>0
{txt}(0 real changes made)

{com}. replace ihslights=. if flaremin>0
{txt}(0 real changes made)

{com}. 
. 
. merge m:1 x y using "../data/pfaf4_grid.dta"
{res}
{txt}{col 5}Result{col 33}Number of obs
{col 5}{hline 41}
{col 5}Not matched{col 30}{res}       4,498,414
{txt}{col 9}from master{col 30}{res}       3,552,240{txt}  (_merge==1)
{col 9}from using{col 30}{res}         946,174{txt}  (_merge==2)

{col 5}Matched{col 30}{res}      55,021,512{txt}  (_merge==3)
{col 5}{hline 41}

{com}. drop _merge
{txt}
{com}. 
. collapse (mean) lights ihslights drought (count) cellsinpfaf4=lights, by(conpfaf4 year)
{res}{txt}
{com}. 
. 
. label var cellsinpfaf4 "Num 0.05 degree cells in pfa4"
{txt}
{com}. 
. tempfile fine
{txt}
{com}. 
. save `fine', replace
{txt}{p 0 4 2}
(file {bf}
C:\Users\hilar\AppData\Local\Temp\ST_61e8_000001.tmp{rm}
not found)
{p_end}
{p 0 4 2}
file {bf}
C:\Users\hilar\AppData\Local\Temp\ST_61e8_000001.tmp{rm}
saved
as .dta format
{p_end}

{com}. 
. * now get .5 degreee data
. use  "../data/pfaf4_grid.dta", clear
{txt}
{com}. 
. gen x_10=floor((x-1)/10) + 1
{txt}
{com}. gen y_10=floor((y-1)/10) + 1 
{txt}
{com}. 
. *pick pfaf4 that is most common in .5 degree grid cell
. bys x_10 y_10: egen pfaf4mode=mode(conpfaf4), maxmode 
{txt}
{com}. 
. collapse (firstnm) conpfaf4=pfaf4mode, by(x_10 y_10)
{res}{txt}
{com}. 
. 
. rename x_10 x
{res}{txt}
{com}. rename y_10 y
{res}{txt}
{com}. 
. sort x y
{txt}
{com}. 
. 
. * add temp data at half degree level
. merge 1:m x y using "../data/annual_av_tmp.dta"
{res}
{txt}{col 5}Result{col 33}Number of obs
{col 5}{hline 41}
{col 5}Not matched{col 30}{res}       1,607,304
{txt}{col 9}from master{col 30}{res}               0{txt}  (_merge==1)
{col 9}from using{col 30}{res}       1,607,304{txt}  (_merge==2)

{col 5}Matched{col 30}{res}         725,496{txt}  (_merge==3)
{col 5}{hline 41}

{com}. drop if _merge==2
{txt}(1,607,304 observations deleted)

{com}. drop _merge
{txt}
{com}. 
. sort y x year
{txt}
{com}. 
. ** merge sc-PDSI data
. merge 1:1 y x year using "../data/pdsi.dta", sorted
{res}
{txt}{col 5}Result{col 33}Number of obs
{col 5}{hline 41}
{col 5}Not matched{col 30}{res}       2,579,304
{txt}{col 9}from master{col 30}{res}               0{txt}  (_merge==1)
{col 9}from using{col 30}{res}       2,579,304{txt}  (_merge==2)

{col 5}Matched{col 30}{res}         725,496{txt}  (_merge==3)
{col 5}{hline 41}

{com}. drop if _merge==2
{txt}(2,579,304 observations deleted)

{com}. drop _merge
{txt}
{com}. 
. 
. * add WB groundwater data
. 
. * use WB definition of center of cells
. gen lon=-179.75+.5*(x-1)
{txt}
{com}. gen lat=74.75-.5*(y-1)
{txt}
{com}. 
. 
. label var lon "Logitude (midpoint)"
{txt}
{com}. label var lat "Latitude (midpoint)"
{txt}
{com}. 
. 
. sort lon lat
{txt}
{com}. merge m:1 lon lat using "../data/aqtyp_gwresource_grid05deg.dta"
{res}{txt}{p 0 7 2}
(variable
{bf:lat} was {bf:float}, now {bf:double} to accommodate using data's values)
{p_end}
{p 0 7 2}
(variable
{bf:lon} was {bf:float}, now {bf:double} to accommodate using data's values)
{p_end}

{col 5}Result{col 33}Number of obs
{col 5}{hline 41}
{col 5}Not matched{col 30}{res}          11,891
{txt}{col 9}from master{col 30}{res}           2,112{txt}  (_merge==1)
{col 9}from using{col 30}{res}           9,779{txt}  (_merge==2)

{col 5}Matched{col 30}{res}         723,384{txt}  (_merge==3)
{col 5}{hline 41}

{com}. drop if _merge==2
{txt}(9,779 observations deleted)

{com}. drop _merge
{txt}
{com}. 
. 
. 
. *** now collapse to pfaf4 level
. 
. sort conpfaf4 year
{txt}
{com}. 
. collapse (mean) pdsi* *tmp aqtyp_pct* resource*, by(conpfaf4 year)
{res}{txt}
{com}. 
. 
. ** combine with more disaggreagated data
. 
. merge 1:1 conpfaf4 year using `fine'
{res}
{txt}{col 5}Result{col 33}Number of obs
{col 5}{hline 41}
{col 5}Not matched{col 30}{res}          45,845
{txt}{col 9}from master{col 30}{res}           5,676{txt}  (_merge==1)
{col 9}from using{col 30}{res}          40,169{txt}  (_merge==2)

{col 5}Matched{col 30}{res}         123,888{txt}  (_merge==3)
{col 5}{hline 41}

{com}. drop if _merge==1
{txt}(5,676 observations deleted)

{com}. drop _merge
{txt}
{com}. 
. * add dams data from spatial join
. merge m:1 conpfaf4 using "../data/pfaf4_char.dta"
{res}{txt}{p 0 7 2}
(variable
{bf:conpfaf4} was {bf:float}, now {bf:double} to accommodate using data's values)
{p_end}

{col 5}Result{col 33}Number of obs
{col 5}{hline 41}
{col 5}Not matched{col 30}{res}             189
{txt}{col 9}from master{col 30}{res}             100{txt}  (_merge==1)
{col 9}from using{col 30}{res}              89{txt}  (_merge==2)

{col 5}Matched{col 30}{res}         163,957{txt}  (_merge==3)
{col 5}{hline 41}

{com}. drop if _merge==2
{txt}(89 observations deleted)

{com}. drop _merge
{txt}
{com}. 
. 
. 
. *create information on lights by pfaf4 over time so can exclude unpopulated places
. bysort conpfaf4: egen minlights=min(lights)
{txt}(709 missing values generated)

{com}.  
. label var minlights "Min (over time) lights for subbasin" 
{txt}
{com}. 
. drop if minlights==0 | missing(minlights)
{txt}(68,364 observations deleted)

{com}. 
. 
. ** all labels after collapse
. 
. label var lights "Light index"
{txt}
{com}. label var drought "Remote-sensed DSI"
{txt}
{com}. label var conpfaf4 "Pfaf4, unique by continent"
{txt}
{com}. label var tmp "Annual av temp"
{txt}
{com}. label var dev_tmp "Temp anomaly"
{txt}
{com}. 
. 
. *11 bins
. generate dsi_cat_temp=recode(drought,-1.5,-1.2,-.9,-.6,-.3,.3,.6,.9,1.2,1.5,4)
{txt}(3,881 missing values generated)

{com}. egen byte dsi_cat11=group(dsi_cat_temp)
{txt}(3,881 missing values generated)

{com}. drop dsi_cat_temp
{txt}
{com}. *7 bins
. gen dsi_cat7=dsi_cat11
{txt}(3,881 missing values generated)

{com}. replace dsi_cat7=7 if dsi_cat11>=7
{txt}(26,137 real changes made)

{com}. gen extsev=(dsi_cat11<=2)
{txt}
{com}. gen extreme=(dsi_cat11==1)
{txt}
{com}. 
. 
. 
. label define cat11 1 "Extreme drought"  2 "Severe drought" 3 "Moderate drought" 4 "Mild drought" ///
> 5 "Incipient drought" 6 "Near normal" 7 "Incipient wet spell"  8 "Slightly wet" 9 "Moderately wet" ///
> 10 "Very wet"  11 "Extremely wet"
{txt}
{com}. label values dsi_cat11 cat11
{txt}
{com}. label define cat7 1 "Extreme drought"  2 "Severe drought" 3 "Moderate drought" 4 "Mild drought" ///
> 5 "Incipient drought" 6 "Near normal" 7 "Wet"  
{txt}
{com}. label values dsi_cat7 cat7
{txt}
{com}. label var dsi_cat11 "DSI, 11 categories"
{txt}
{com}. label var dsi_cat7 "DSI, 7 categories"
{txt}
{com}. label var extsev "Extreme or severe drought"
{txt}
{com}. 
. label var aqtyp_pct_ma "Major alluvial (%)" 
{txt}
{com}. label var aqtyp_pct_cx "Complex aquifer (%)" 
{txt}
{com}. label var aqtyp_pct_kt  "Karstic (%)"
{txt}
{com}. label var aqtyp_pct_ls "Local/shallow aquifer (%)"
{txt}
{com}. label var aqtyp_pct_NA "No aquifer data (%)" 
{txt}
{com}. label var resource_pct_grid_na "Not covered by country-lithological-outcrop resource data (% of grid cell)"
{txt}
{com}. label var resource "GW resource, sum w/in cell (10^9 m3/yr)" 
{txt}
{com}. label var resource_norm "GW resource, normalized (10^9 m3/yr)"
{txt}
{com}. 
. 
. *using definitions in van der Schrier article
. generate pdsi_cat_temp=recode(pdsi_av,-4,-3,-2,-1,-.5,.5,1,2,3,4,30)
{txt}(20,777 missing values generated)

{com}. gen pdsi_summer_cat_temp=recode(pdsi_summer,-4,-3,-2,-1,-.5,.5,1,2,3,4,30)
{txt}(20,777 missing values generated)

{com}. gen pdsi_min_cat_temp=recode(pdsi_summer,-4,-3,-2,-1,-.5,.5,1,2,3,4,30)
{txt}(20,777 missing values generated)

{com}. 
. *matching distribution of cut points in remote sensed DSI
. *_pctile pdsi_av, percentiles(4, 8, 15, 25, 36, 63, 75, 85, 92, 97)
. *generate pdsi_cat_temp=recode(pdsi_av,`r(r1)',`r(r2)',`r(r3)',`r(r4)',`r(r5)',`r(r6)',`r(r7)',`r(r8)',`r(r9)',`r(r10)',30)
. egen byte pdsi_cat=group(pdsi_cat_temp)
{txt}(20,777 missing values generated)

{com}. egen byte pdsi_summer_cat=group(pdsi_summer_cat_temp)
{txt}(20,777 missing values generated)

{com}. egen byte pdsi_min_cat=group(pdsi_min_cat_temp)
{txt}(20,777 missing values generated)

{com}. 
. drop pdsi_cat_*temp 
{txt}
{com}. 
. label var pdsi_cat "sc-PDSI categories"
{txt}
{com}. label var pdsi_summer_cat "sc-PDSI categories in June or Dec"
{txt}
{com}. label var pdsi_min_cat "sc-PDSI categories at annual min"
{txt}
{com}. 
. label define pcat 1 "Extremely dry"  2 "Severely dry" 3 "Moderately dry" 4 "Mildly dry" ///
> 5 "Incipient drought" 6 "Near normal" 7 "Incipient wet spell"  8 "Slightly wet" 9 "Moderately wet" ///
> 10 "Very wet"  11 "Extremely wet"
{txt}
{com}. label values pdsi_cat pcat
{txt}
{com}. label values pdsi_summer_cat pcat
{txt}
{com}. label values pdsi_min_cat pcat
{txt}
{com}. 
. 
. compress
  {txt}variable {bf}{res}dsi_cat7{sf}{txt} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}{res}extsev{sf}{txt} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}{res}extreme{sf}{txt} was {bf}{res}float{sf}{txt} now {bf}{res}byte{sf}
  {txt}variable {bf}{res}conpfaf4{sf}{txt} was {bf}{res}double{sf}{txt} now {bf}{res}long{sf}
{txt}  (1,244,009 bytes saved)

{com}. 
. label data "Data at the Pfaf4 (not grid cell) level"
{txt}
{com}. 
. 
. save for_reg_pfaf4, replace
{txt}{p 0 4 2}
(file {bf}
for_reg_pfaf4.dta{rm}
not found)
{p_end}
{p 0 4 2}
file {bf}
for_reg_pfaf4.dta{rm}
saved
{p_end}

{com}. 
. timer off 1
{txt}
{com}. timer list 1
{res}   1:     71.34 /        1 =      71.3420
{txt}
{com}. log close
      {txt}name:  {res}<unnamed>
       {txt}log:  {res}C:\Users\hilar\Box\lights_2022\replication package\code\merge_pfaf4.smcl
  {txt}log type:  {res}smcl
 {txt}closed on:  {res}20 Dec 2025, 17:26:33
{txt}{.-}
{smcl}
{txt}{sf}{ul off}